In [1]:
# Load the swat package and turn off note messages
import swat
swat.options.cas.print_messages = False
# set the connection: host, port, username, password
s = swat.CAS(host, port, username, password)
In [2]:
# list all loaded actionsets
s.builtins.actionSetInfo()
Out[2]:
§ setinfo
Action set information
actionset
label
loaded
extension
build_time
portdate
0
accessControl
Access Controls
1
tkacon
2016-09-07 18:41:50
V.03.01M0P09072016
1
accessControl
Access Controls
1
casmeta
2016-09-07 18:41:50
V.03.01M0P09072016
2
builtins
System
1
tkcasablt
2016-09-07 18:41:50
V.03.01M0P09072016
3
configuration
Server Properties
1
tkcascfg
2016-09-07 18:41:47
V.03.01M0P09072016
4
dataPreprocess
Data Preprocess
1
tktrans
2016-09-07 18:41:49
V.03.01M0P09072016
5
dataStep
DATA Step
1
datastep
2016-09-07 18:41:27
V.03.01M0P09072016
6
percentile
Percentile
1
tkcasptl
2016-09-07 18:41:49
V.03.01M0P09072016
7
search
Search
1
casidx
2016-09-07 18:12:22
V.03.01M0P09072016
8
session
Session Methods
1
tkcsessn
2016-09-07 18:41:49
V.03.01M0P09072016
9
sessionProp
Session Properties
1
tkcstate
2016-09-07 18:41:49
V.03.01M0P09072016
10
simple
Analytics
1
tkimstat
2016-09-07 18:41:49
V.03.01M0P09072016
11
table
Tables
1
tkcastab
2016-09-13 12:24:04
V.03.01M0P09072016
elapsed 0.00676s · user 0.007s · sys 0.007s · mem 0.277MB
In [3]:
# list each actionset with available actions as an ordered dict
s.help()
Out[3]:
§ accessControl
name
description
0
assumeRole
Assumes a role
1
dropRole
Relinquishes a role
2
showRolesIn
Shows the currently active role
3
showRolesAllowed
Shows the roles that a user is a member of
4
isInRole
Shows whether a role is assumed
5
isAuthorized
Shows whether access is authorized
6
isAuthorizedActions
Shows whether access is authorized to actions
7
isAuthorizedTables
Shows whether access is authorized to tables
8
isAuthorizedColumns
Shows whether access is authorized to columns
9
listAllPrincipals
Lists all principals that have explicit access...
10
whatIsEffective
Lists effective access and explanations (Origins)
11
listAcsData
Lists access controls for caslibs, tables, and...
12
listAcsActionSet
Lists access controls for an action or action set
13
repAllAcsCaslib
Replaces all access controls for a caslib
14
repAllAcsTable
Replaces all access controls for a table
15
repAllAcsColumn
Replaces all access controls for a column
16
repAllAcsActionSet
Replaces all access controls for an action set
17
repAllAcsAction
Replaces all access controls for an action
18
updSomeAcsCaslib
Adds, deletes, and modifies some access contro...
19
updSomeAcsTable
Adds, deletes, and modifies some access contro...
20
updSomeAcsColumn
Adds, deletes, and modifies some access contro...
21
updSomeAcsActionSet
Adds, deletes, and modifies some access contro...
22
updSomeAcsAction
Adds, deletes, and modifies some access contro...
23
remAllAcsData
Removes all access controls for a caslib, tabl...
24
remAllAcsActionSet
Removes all access controls for an action set ...
25
operTableMd
Adds, deletes, and modifies table metadata
26
operColumnMd
Adds, deletes, and modifies column metadata
27
operActionSetMd
Adds, deletes, and modifies action set metadata
28
operActionMd
Adds, deletes, and modifies action metadata
29
operAdminMd
Assigns users and groups to roles and modifies...
30
listMetadata
Lists the metadata for caslibs, tables, column...
31
persistMetadata
Persists the access control metadata
32
createBackup
Creates a backup if one is not in progress
33
completeBackup
Flags a backup as complete
34
operBWPaths
Configures a blacklist or whitelist of paths
35
deleteBWList
Deletes a blacklist or a whitelist
§ builtins
name
description
0
addNode
Adds a machine to the server
1
removeNode
Remove one or more machines from the server
2
help
Shows the parameters for an action or lists al...
3
listNodes
Shows the host names used by the server
4
loadActionSet
Loads an action set for use in this session
5
installActionSet
Loads an action set in new sessions automatically
6
log
Shows and modifies logging levels
7
queryActionSet
Shows whether an action set is loaded
8
queryName
Checks whether a name is an action or action s...
9
reflect
Shows detailed parameter information for an ac...
10
serverStatus
Shows the status of the server
11
about
Shows the status of the server
12
shutdown
Shuts down the server
13
getUsers
Shows the users from the authentication provider
14
getGroups
Shows the groups from the authentication provider
15
userInfo
Shows the user information for your connection
16
actionSetInfo
Shows the build information from loaded action...
17
history
Shows the actions that were run in this session
18
casCommon
Provides parameters that are common to many ac...
19
ping
Sends a single request to the server to confir...
20
echo
Prints the supplied parameters to the client log
21
modifyQueue
Modifies the action response queue settings
22
getLicenseInfo
Shows the license information for a SAS product
23
refreshLicense
Refresh SAS license information from a file
24
httpAddress
Shows the HTTP address for the server monitor
§ configuration
name
description
0
getServOpt
displays the value of a server option
1
listServOpts
Displays the server options and server values
§ dataPreprocess
name
description
0
rustats
Computes robust univariate statistics, central...
1
impute
Performs data matrix (variable) imputation
2
outlier
Performs outlier detection and treatment
3
binning
Performs unsupervised variable discretization
4
discretize
Performs supervised and unsupervised variable ...
5
histogram
Generates histogram bins and simple bin-based ...
6
transform
Performs pipelined variable imputation, outlie...
7
kde
Computes kernel density estimation
§ dataStep
name
description
0
runCode
Runs DATA step code
§ percentile
name
description
0
percentile
Calculate quantiles and percentiles
1
boxPlot
Calculate quantiles, high and low whiskers, an...
2
assess
Assess and compare models
§ search
name
description
0
searchIndex
Searches for a query against an index and retr...
1
searchAggregate
Aggregates certain fields in a table that is u...
2
valueCount
value count for multiple fields
3
buildIndex
Creates an empty index using a schema (the fir...
4
getSchema
Gets the schema of an index
5
appendIndex
Loads data to an index after the buildIndex ac...
6
deleteDocuments
Delete a portion of documents from index
§ session
name
description
0
listSessions
Displays a list of the sessions on the server
1
addNodeStatus
Lists details about machines currently being a...
2
timeout
Changes the time-out for a session
3
endSession
Ends the current session
4
sessionId
Displays the name and UUID of the current session
5
sessionName
Changes the name of the current session
6
sessionStatus
Displays the status of the current session
7
listresults
Lists the saved results for a session
8
batchresults
Change current action to batch results
9
fetchresult
Fetch the specified saved result for a session
10
flushresult
Flush the saved result for this session
11
setLocale
Changes the locale for the current session
12
metrics
Displays the metrics for each action after it ...
§ sessionProp
name
description
0
setSessOpt
Sets a session option
1
getSessOpt
Displays the value of a session option
2
listSessOpts
Displays the session options and session values
3
addFmtLib
Adds a format library
4
listFmtLibs
Lists the format libraries that are associated...
5
setFmtSearch
Sets the format libraries to search
6
listFmtSearch
Shows the format library search order
7
dropFmtLib
Drops a format library from global scope for a...
8
deleteFormat
Deletes a format from a format library
9
addFormat
Adds a format to a format library
10
listFmtValues
Shows the values for a format
11
saveFmtLib
Saves a format library
12
promoteFmtLib
Promotes a format library to global scope for ...
13
listFmtRanges
Displays the range information for a format
§ simple
name
description
0
mdSummary
Calculates multidimensional summaries of numer...
1
numRows
Shows the number of rows in a Cloud Analytic S...
2
summary
Generates descriptive statistics of numeric va...
3
correlation
Generates a matrix of Pearson product-moment c...
4
regression
Performs a linear regression up to 3rd-order p...
5
crossTab
Performs one-way or two-way tabulations
6
distinct
Computes the distinct number of values of the ...
7
topK
Returns the top-K and bottom-K distinct values...
8
groupBy
Builds BY groups in terms of the variable valu...
9
freq
Generates a frequency distribution for one or ...
10
paraCoord
Generates a parallel coordinates plot of the v...
§ table
name
description
0
view
Creates a view from files or tables
1
attribute
Manages extended table attributes
2
upload
Transfers binary data to the server to create ...
3
loadTable
Loads a table from a caslib's data source
4
tableExists
Checks whether a table has been loaded
5
columnInfo
Shows column information
6
fetch
Fetches rows from a table or view
7
save
Saves a table to a caslib's data source
8
addTable
Add a table by sending it from the client to t...
9
tableInfo
Shows information about a table
10
tableDetails
Get detailed information about a table
11
dropTable
Drops a table
12
deleteSource
Delete a table or file from a caslib's data so...
13
fileInfo
Lists the files in a caslib's data source
14
promote
Promote a table to global scope
15
addCaslib
Adds a new caslib to enable access to a data s...
16
dropCaslib
Drops a caslib
17
caslibInfo
Shows caslib information
18
queryCaslib
Checks whether a caslib exists
19
partition
Partitions a table
20
recordCount
Shows the number of rows in a Cloud Analytic S...
21
loadDataSource
Loads one or more data source interfaces
22
update
Updates rows in a table
elapsed 0.0069s · user 0.004s · sys 0.005s · mem 0.192MB
In [4]:
# session.actionset.action
help(s.dataPreprocess.impute)
Help on datapreprocess.Impute in module swat.cas.actions object:
class datapreprocess.Impute(CASAction)
| Performs data matrix (variable) imputation
|
| Parameters
| ----------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| Impute object
|
| Method resolution order:
| datapreprocess.Impute
| CASAction
| swat.cas.utils.params.ParamManager
| builtins.object
|
| Methods defined here:
|
| __call__(_self_, table=None, methodcontinuous=None, methodnominal=None, inputs=None, nnominalvars=None, nominalvarsindices=None, freq=None, weight=None, seed=None, minrandom=None, maxrandom=None, percentiledefinition=None, percentilemaxiterations=None, percentiletolerance=None, valuescontinuous=None, valuesnominal=None, includemissinggroup=None, code=None, sasvarnamelength=None, casout=None, casoutimputeinformation=None, includeinputvars=None, copyallvars=None, outvarsnameprefix=None, outvarsnamesuffix=None, copyvars=None, forcemissingcount=None, outputtableoptions=None, **kwargs)
| Performs data matrix (variable) imputation
|
| Parameters
| ----------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| CASResults object
|
| __init__(_self_, table=None, methodcontinuous=None, methodnominal=None, inputs=None, nnominalvars=None, nominalvarsindices=None, freq=None, weight=None, seed=None, minrandom=None, maxrandom=None, percentiledefinition=None, percentilemaxiterations=None, percentiletolerance=None, valuescontinuous=None, valuesnominal=None, includemissinggroup=None, code=None, sasvarnamelength=None, casout=None, casoutimputeinformation=None, includeinputvars=None, copyallvars=None, outvarsnameprefix=None, outvarsnamesuffix=None, copyvars=None, forcemissingcount=None, outputtableoptions=None, **kwargs)
| Performs data matrix (variable) imputation
|
| Parameters
| ----------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| Impute object
|
| get_param(_self_, key)
| Get the value of an action parameter
|
| Parameters
| ----------
| key : string
| The fully-qualified name (e.g., table.name) of the parameter to retrieve.
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| any
| The value of the speciifed parameter.
|
| get_params(_self_, *keys)
| Get the value of one or more action parameters
|
| Parameters
| ----------
| *keys : one or more strings
| The fully-qualified names (e.g., table.name) of the parameters to retrieve.
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| dict
| A dictionary of key value pairs containing the requested parameters.
|
| set_param(_self_, *args, **kwargs)
| Set one or more action parameters
|
| Parameters
| ----------
| *args : string / any pairs, optional
| Parameters can be specified as fully-qualified names (e.g, table.name)
| and values as subsequent arguments. Any number of name / any pairs
| can be specified.
| **kwargs : any, optional
| Parameters can be specified as any number of keyword arguments.
|
| Examples
| --------
| #
| # String / any pairs
| #
| > summ = s.simple.Sumamry()
| > summ.set_param('table.name', 'iris',
| 'table.singlepass', True,
| 'casout.name', 'iris_summary')
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_summary'})
|
| #
| # Keywords
| #
| > summ.set_param(casout=dict(name='iris_out'))
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_out'})
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| None
|
| set_params(_self_, *args, **kwargs)
| Set one or more action parameters
|
| Parameters
| ----------
| *args : string / any pairs, optional
| Parameters can be specified as fully-qualified names (e.g, table.name)
| and values as subsequent arguments. Any number of name / any pairs
| can be specified.
| **kwargs : any, optional
| Parameters can be specified as any number of keyword arguments.
|
| Examples
| --------
| #
| # String / any pairs
| #
| > summ = s.simple.Sumamry()
| > summ.set_param('table.name', 'iris',
| 'table.singlepass', True,
| 'casout.name', 'iris_summary')
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_summary'})
|
| #
| # Keywords
| #
| > summ.set_param(casout=dict(name='iris_out'))
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_out'})
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the table name, caslib, and other common parameters.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.groupby : list of dicts, optional
| specifies the names of the variables to use for grouping
| results.
|
| table.groupby[*].name : string
| specifies the name for the variable.
|
| table.groupby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.groupby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.groupby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.groupby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.groupby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.orderby : list of dicts, optional
| specifies the variables to use for ordering observations within
| partitions. This parameter applies to partitioned tables or it
| can be combined with groupBy variables when groupByMode is set to
| REDISTRIBUTE.
|
| table.orderby[*].name : string
| specifies the name for the variable.
|
| table.orderby[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.orderby[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.orderby[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.orderby[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.orderby[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.groupbymode : string, optional
| specifies how the server creates groups.
| Default: NOSORT
| Values: NOSORT, REDISTRIBUTE
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| methodcontinuous : string, optional
| specifies the imputation technique for continuous variables. Be
| aware that you can specify numeric variables as nominal using the
| nomVarsIndices parameter.
| Default: MEAN
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| methodnominal : string, optional
| specifies the imputation technique for nominal variables.
| Default: MODE
| Values: MAX, MEAN, MEDIAN, MIDRANGE, MIN, MODE, RANDOM, VALUE
|
| inputs : list of dicts, optional
| specifies the variables to use for the analysis. You can specify a
| subset of the variables from the input table.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nnominalvars : int32, optional
| specifies to treat the last nNomVars variables as nominal if you do
| not provide a value for the nomVarsIndices parameter.
| Default: 0
| Note: Value range is 0 < n < 2147483647
|
| nominalvarsindices : list, optional
| specifies the indices of the variables to treat as nominal
| variables.
| Default: []
| Note: Value range is 0 < n < inf
|
| freq : string, optional
| specifies the frequency variable.
|
| weight : string, optional
| specifies the weight variable.
|
| seed : int32, optional
| specifies a seed value. The seed is used to generate random values.
| Default: 0
|
| minrandom : double, optional
| specifies the minimum random number to generate.
| Default: 0.0
|
| maxrandom : double, optional
| specifies the maximum random number to generate.
| Default: 0.0
|
| percentiledefinition : int32, optional
| specifies the percentile definition to use. The definitions are
| numbered 1 to 5. The default value, 0, specifies not to use a
| percentile definition.
| Default: 6
| Note: Value range is 1 <= n <= 6
|
| percentilemaxiterations : int32, optional
| specifies the maximum number of iterations for percentile
| computation.
| Default: 0
|
| percentiletolerance : double, optional
| specifies the tolerance for percentile computation.
| Default: 1e-05
|
| valuescontinuous : list, optional
| specifies a list of double values for imputation for the continuous
| variables.
| Default: []
|
| valuesnominal : list, optional
| specifies a list of string values for imputation for the nominal
| variables.
| Default: []
|
| includemissinggroup : boolean, optional
| when set to True, missing values are allowed as group-by keys.
| Default: False
|
| code : dict, optional
| specifies the settings for generating SAS DATA step scoring code.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| sasvarnamelength : boolean, optional
| when set to True, the lengths of the names of the output variables
| are constrained to be less than or equal 32 characters.
| Default: False
|
| casout : dict or CASTable, optional
| scores the input table and saves the scoring results as a table.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| casoutimputeinformation : dict or CASTable, optional
| specifies the settings for an output table that includes information
| about the results of the impute action.
|
| casoutimputeinformation.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casoutimputeinformation.caslib : string, optional
| specifies the name of the caslib to use.
|
| casoutimputeinformation.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casoutimputeinformation.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casoutimputeinformation.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casoutimputeinformation.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casoutimputeinformation.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casoutimputeinformation.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casoutimputeinformation.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casoutimputeinformation.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| includeinputvars : boolean, optional
| when set to True, the analysis variables from the input table that
| are specified in the vars parameter are copied to the output table.
| Default: False
|
| copyallvars : boolean, optional
| when set to True, all the variables from the input table are copied
| to the scored output table.
| Default: False
|
| outvarsnameprefix : string, optional
| specifies a prefix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the suffix
| parameter at the same time.
| Default: imp
|
| outvarsnamesuffix : string, optional
| specifies a suffix to apply to the names of output variables. If a
| variable named 'x' results in a new variable, the generated name is
| <prefix>_x_<suffix>. You can use this parameter and the prefix
| parameter at the same time.
| Default:
|
| copyvars : list of strings, optional
| specifies the names of variables in the input table to use for
| identifying scored observations in the output table. The specified
| variables are copied to the output table.
| Default: []
|
| forcemissingcount : boolean, optional
| when set to True, techForCont is VALUE or RANDOM, and casOut is not
| specified, the server returns the row count and missing count. This
| is done even if it requires an additional pass through the data.
| Leaving it False is efficient for large tables.
| Default: False
|
| outputtableoptions : dict, optional
| specifies options for result tables. You can specify which result
| tables the server returns and how group-by results are handled.
|
| outputtableoptions.tablenames : list, optional
| specifies the names of result tables to generate. By default,
| all result tables are returned.
| Default: []
|
| outputtableoptions.forcetablereturn : boolean, optional
| when set to True, result tables are returned to the client even
| if the output is also saved as an output table.
| Default: False
|
| Returns
| -------
| None
|
| ----------------------------------------------------------------------
| Data and other attributes defined here:
|
| all_params = {'casout', 'casout.caslib', 'casout.compress', 'casout.la...
|
| param_names = ['table', 'methodcontinuous', 'methodnominal', 'inputs',...
|
| ----------------------------------------------------------------------
| Methods inherited from CASAction:
|
| __iter__(self)
| Call the action and iterate over the results
|
| invoke(self, **kwargs)
| Invoke the action
|
| Parameters
| ----------
| **kwargs : any, optional
| Arbitrary key/value pairs to add to the arguments sent to the
| action. These key/value pairs are not added to the collection
| of parameters set on the action object. They are only used in
| this call.
|
| Returns
| -------
| self
| Returns the CASAction object itself
|
| retrieve = __call__(self, **kwargs)
| Call the action
|
| Parameters
| ----------
| **kwargs : any, optional
| Arbitrary key/value pairs to add to the arguments sent to the
| action. These key/value pairs are not added to the collection
| of parameters set on the action object. They are only used in
| this call.
|
| Returns
| -------
| CASResults object
| Collection of results from the action call
|
| ----------------------------------------------------------------------
| Class methods inherited from CASAction:
|
| from_reflection(asname, actinfo, connection) from builtins.type
| Construct a CASAction class from reflection information
|
| Parameters
| ----------
| asname : string
| The action set name
| actinfo : dict
| The reflection information for the action
| connection : CAS object
| The connection to associate with the CASAction
| defaults : dict
| Default parameters for the action
|
| Returns
| -------
| CASAction class
|
| get_connection() from builtins.type
| Return the registered connection
|
| The connection is only held by a weak reference. If the
| connection no longer exists, a SWATError is raised.
|
| Raises
| ------
| SWATError
| If the registered connection no longer exists
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from CASAction:
|
| trait_names = None
|
| ----------------------------------------------------------------------
| Methods inherited from swat.cas.utils.params.ParamManager:
|
| __delattr__(self, name)
| Delete an attribute
|
| __enter__(self)
|
| __exit__(self, type, value, traceback)
|
| __getattr__(self, name)
| Get named attribute
|
| __repr__(self)
| Return repr(self).
|
| __setattr__(self, name, value)
| Set an attribute
|
| __str__(self)
| Return str(self).
|
| del_param = del_params(self, *keys)
| Delete parameters
|
| Parameters
| ----------
| *keys : strings
| Names of parameters to delete
|
| Returns
| -------
| None
|
| del_params(self, *keys)
| Delete parameters
|
| Parameters
| ----------
| *keys : strings
| Names of parameters to delete
|
| Returns
| -------
| None
|
| has_param = has_params(self, *keys)
| Return a boolean indicating whether or not the parameters exist
|
| Parameters
| ----------
| *keys : one or more strings
| Names of parameters
|
| Returns
| -------
| True or False
|
| has_params(self, *keys)
| Return a boolean indicating whether or not the parameters exist
|
| Parameters
| ----------
| *keys : one or more strings
| Names of parameters
|
| Returns
| -------
| True or False
|
| to_dict(self)
| Return the parameters as a dictionary
|
| to_json(self, *args, **kwargs)
| Convert parameters to JSON
|
| Parameters
| ----------
| *args : any, optional
| Additional arguments to json.dumps
| **kwargs : any, optional
| Additional arguments to json.dumps
|
| Returns
| -------
| string
|
| to_params = to_dict(self)
| Return the parameters as a dictionary
|
| ----------------------------------------------------------------------
| Data descriptors inherited from swat.cas.utils.params.ParamManager:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
In [5]:
# list all of the actionsets, whether they are loaded or not
s.builtins.actionSetInfo(all = True)
Out[5]:
§ setinfo
Action set information
actionset
label
loaded
extension
build_time
portdate
0
access
0
tkacon
2016-09-07 18:41:50
V.03.01M0P09072016
1
accessControl
Access Controls
1
casmeta
2016-09-07 18:41:50
V.03.01M0P09072016
2
aggregation
0
tkcasagg
2016-09-07 18:41:47
V.03.01M0P09072016
3
astore
0
astore
2016-09-07 17:51:14
V.03.01M0P09072016
4
autotune
0
optminer
2016-09-07 19:33:09
V.03.01M0P09072016
5
boolRule
0
casblr
2016-09-07 19:00:58
V.03.01M0P09072016
6
builtins
System
1
tkcasablt
2016-09-07 18:41:50
V.03.01M0P09072016
7
cardinality
0
cardinality
2016-09-16 08:50:17
V.03.01M0P09072016
8
clustering
0
tkcaskclus
2016-09-07 17:51:17
V.03.01M0P09072016
9
configuration
Server Properties
1
tkcascfg
2016-09-07 18:41:47
V.03.01M0P09072016
10
dataPreprocess
Data Preprocess
1
tktrans
2016-09-07 18:41:49
V.03.01M0P09072016
11
dataStep
DATA Step
1
datastep
2016-09-07 18:41:27
V.03.01M0P09072016
12
decisionTree
0
tkcasdt
2016-09-07 18:41:49
V.03.01M0P09072016
13
ds2
0
casds2
2016-09-07 19:27:58
V.03.01M0P09072016
14
factmac
0
tkfactmac
2016-09-07 17:51:15
V.03.01M0P09072016
15
fedSql
0
casfedsql
2016-09-07 19:27:58
V.03.01M0P09072016
16
hiddenMarkovModel
0
hmm
2016-09-07 18:12:07
V.03.01M0P09072016
17
hyperGroup
0
tkhypgrp
2016-09-07 18:41:47
V.03.01M0P09072016
18
image
0
tkimage
2016-09-07 17:51:10
V.03.01M0P09072016
19
loadStreams
0
espact
2016-09-07 18:41:49
V.03.01M0P09072016
20
midTierServices
0
mtpsrvc
2016-09-07 18:12:18
V.03.01M0P09072016
21
network
Network
0
networkcommon
2016-09-07 19:27:44
V.03.01M0P09072016
22
network
Network
0
networksocial
2016-09-07 19:27:44
V.03.01M0P09072016
23
neuralNet
0
tkcasann
2016-09-07 18:41:49
V.03.01M0P09072016
24
nonlinear
0
nlmcas
2016-09-07 17:51:15
V.03.01M0P09072016
25
panel
0
Panel
2016-09-07 17:51:13
V.03.01M0P09072016
26
pca
0
tkpca
2016-09-07 17:51:18
V.03.01M0P09072016
27
percentile
Percentile
1
tkcasptl
2016-09-07 18:41:49
V.03.01M0P09072016
28
pls
0
pls
2016-09-07 17:51:13
V.03.01M0P09072016
29
quantreg
0
rqscas
2016-09-07 17:51:13
V.03.01M0P09072016
30
recommend
0
tkrecom
2016-09-07 18:41:49
V.03.01M0P09072016
31
regression
0
regcas
2016-09-14 13:28:39
V.03.01M0P09072016
32
sampling
0
tkcassamp
2016-09-07 17:51:16
V.03.01M0P09072016
33
search
Search
1
casidx
2016-09-07 18:12:22
V.03.01M0P09072016
34
sentimentAnalysis
0
cassent
2016-09-07 18:12:22
V.03.01M0P09072016
35
sequence
0
tksequ
2016-09-07 18:41:47
V.03.01M0P09072016
36
session
Session Methods
1
tkcsessn
2016-09-07 18:41:49
V.03.01M0P09072016
37
sessionProp
Session Properties
1
tkcstate
2016-09-07 18:41:49
V.03.01M0P09072016
38
sgComp
0
sgcomp
2016-09-07 17:51:12
V.03.01M0P09072016
39
simple
Analytics
1
tkimstat
2016-09-07 18:41:49
V.03.01M0P09072016
40
svm
0
tkaasvm
2016-09-07 17:51:17
V.03.01M0P09072016
41
table
Tables
1
tkcastab
2016-09-13 12:24:04
V.03.01M0P09072016
42
textMining
0
castmine
2016-09-07 19:00:58
V.03.01M0P09072016
43
textParse
0
casparse
2016-09-07 18:12:21
V.03.01M0P09072016
44
transpose
0
castranspose
2016-09-07 17:51:11
V.03.01M0P09072016
45
varReduce
0
tkaareduce
2016-09-07 17:51:16
V.03.01M0P09072016
46
copula
CAS Copula Simulation Action Library
0
copula
2016-09-07 17:51:13
V.03.01M0P09072016
47
panel
Panel Data
0
panel
2016-09-07 17:51:13
V.03.01M0P09072016
48
cmpcas
0
cmpcas
2016-09-07 18:45:26
V.03.01M0P09072016
49
casfors
Simple forecast service
0
casfors
2016-09-07 17:51:18
V.03.01M0P09072016
50
tkovrd
Forecast override
0
tkovrd
2016-09-07 17:51:14
V.03.01M0P09072016
51
mdchoice
MDCHOICE CAS Action Library
0
mdchoice
2016-09-07 17:51:13
V.03.01M0P09072016
52
localsearch
Local Search Optimization
0
localsearch
2016-09-07 19:33:08
V.03.01M0P09072016
53
optimization
Optimization
0
optimization
2016-09-07 19:33:09
V.03.01M0P09072016
54
qlimreg
QLIMREG CAS Action Library
0
qlimreg
2016-09-07 18:12:07
V.03.01M0P09072016
55
tkcsestst
Session Tests
0
tkcsestst
2016-09-07 18:41:49
V.03.01M0P09072016
56
tkdnn
DeepNeural
0
tkdnn
2016-09-07 17:51:13
V.03.01M0P09072016
elapsed 0.193s · user 0.377s · sys 0.337s · mem 0.558MB
In [6]:
# load in new actionset
s.builtins.loadActionSet('decisionTree')
# get help again
s.help().decisionTree
Out[6]:
name
description
0
dtreeTrain
Train a decision tree
1
dtreeScore
Score a table using a decision tree model
2
dtreeSplit
Split decision tree nodes
3
dtreePrune
Prune a decision tree
4
dtreeMerge
Merge decision tree nodes
5
dtreeCode
Generate DATA step scoring code from a decisio...
6
forestTrain
Train a forest
7
forestScore
Score a table using a forest model
8
forestCode
Generate DATA step scoring code from a forest ...
9
gbtreeTrain
Train a gradient boosting tree
10
gbtreeScore
Score a table using a gradient boosting tree m...
11
gbtreecode
Generate DATA step scoring code from a gradien...
In [7]:
help(s.decisionTree.gbtreeTrain)
Help on decisiontree.Gbtreetrain in module swat.cas.actions object:
class decisiontree.Gbtreetrain(CASAction)
| Train a gradient boosting tree
|
| Parameters
| ----------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| Gbtreetrain object
|
| Method resolution order:
| decisiontree.Gbtreetrain
| CASAction
| swat.cas.utils.params.ParamManager
| builtins.object
|
| Methods defined here:
|
| __call__(_self_, table=None, target=None, inputs=None, nominals=None, attributes=None, nbins=None, maxlevel=None, maxbranch=None, leafsize=None, missing=None, minuseinsearch=None, greedy=None, binorder=None, varimp=None, code=None, modelid=None, casout=None, mergebin=None, includemissing=None, ntree=None, seed=None, learningrate=None, subsamplerate=None, distribution=None, m=None, lasso=None, ridge=None, singular=None, **kwargs)
| Train a gradient boosting tree
|
| Parameters
| ----------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| CASResults object
|
| __init__(_self_, table=None, target=None, inputs=None, nominals=None, attributes=None, nbins=None, maxlevel=None, maxbranch=None, leafsize=None, missing=None, minuseinsearch=None, greedy=None, binorder=None, varimp=None, code=None, modelid=None, casout=None, mergebin=None, includemissing=None, ntree=None, seed=None, learningrate=None, subsamplerate=None, distribution=None, m=None, lasso=None, ridge=None, singular=None, **kwargs)
| Train a gradient boosting tree
|
| Parameters
| ----------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| Gbtreetrain object
|
| get_param(_self_, key)
| Get the value of an action parameter
|
| Parameters
| ----------
| key : string
| The fully-qualified name (e.g., table.name) of the parameter to retrieve.
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| any
| The value of the speciifed parameter.
|
| get_params(_self_, *keys)
| Get the value of one or more action parameters
|
| Parameters
| ----------
| *keys : one or more strings
| The fully-qualified names (e.g., table.name) of the parameters to retrieve.
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| dict
| A dictionary of key value pairs containing the requested parameters.
|
| set_param(_self_, *args, **kwargs)
| Set one or more action parameters
|
| Parameters
| ----------
| *args : string / any pairs, optional
| Parameters can be specified as fully-qualified names (e.g, table.name)
| and values as subsequent arguments. Any number of name / any pairs
| can be specified.
| **kwargs : any, optional
| Parameters can be specified as any number of keyword arguments.
|
| Examples
| --------
| #
| # String / any pairs
| #
| > summ = s.simple.Sumamry()
| > summ.set_param('table.name', 'iris',
| 'table.singlepass', True,
| 'casout.name', 'iris_summary')
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_summary'})
|
| #
| # Keywords
| #
| > summ.set_param(casout=dict(name='iris_out'))
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_out'})
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| None
|
| set_params(_self_, *args, **kwargs)
| Set one or more action parameters
|
| Parameters
| ----------
| *args : string / any pairs, optional
| Parameters can be specified as fully-qualified names (e.g, table.name)
| and values as subsequent arguments. Any number of name / any pairs
| can be specified.
| **kwargs : any, optional
| Parameters can be specified as any number of keyword arguments.
|
| Examples
| --------
| #
| # String / any pairs
| #
| > summ = s.simple.Sumamry()
| > summ.set_param('table.name', 'iris',
| 'table.singlepass', True,
| 'casout.name', 'iris_summary')
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_summary'})
|
| #
| # Keywords
| #
| > summ.set_param(casout=dict(name='iris_out'))
| > print(summ)
| ?.simple.Summary(table={'name': 'iris', 'singlepass': True},
| casout={'name': 'iris_out'})
|
| Valid Parameters
| ----------------
| table : dict or CASTable
| specifies the settings for an input table.
|
| table.name : string or CASTable
| specifies the name of the table to use.
|
| table.caslib : string, optional
| specifies the caslib containing the table that you want to use
| with the action. By default, the active caslib is used. Specify a
| value only if you need to access a table from a different caslib.
|
| table.where : string, optional
| specifies an expression for subsetting the input data.
|
| table.computedvars : list of dicts, optional
| specifies the names of the computed variables to create. Specify
| an expression for each variable in the computedVarsProgram
| parameter.
|
| table.computedvars[*].name : string
| specifies the name for the variable.
|
| table.computedvars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.computedvars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.computedvars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.computedvars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.computedvars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| table.computedvarsprogram : string, optional
| specifies an expression for each computed variable that you
| include in the computedVars parameter.
|
| table.computedondemand : boolean, optional
| when set to True, the computed variables are created when the
| table is loaded instead of when the action begins.
| Default: False
|
| table.singlepass : boolean, optional
| when set to True, the data does not create a transient table in
| the server. Setting this parameter to True can be efficient, but
| the data might not have stable ordering upon repeated runs.
| Default: False
|
| table.importoptions : dict, optional
| specifies the settings for reading a table from a data source.
|
| table.importoptions.filetype : string
| Default: auto
| Values: auto, hdat, csv, delimited, excel, jmp, spss, dta,
| esp, lasr, basesas, mva, xls, fmt
|
| table.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| table.vars : list of dicts, optional
| specifies the variables to use in the action.
|
| table.vars[*].name : string
| specifies the name for the variable.
|
| table.vars[*].label : string, optional
| specifies the descriptive label for the variable.
|
| table.vars[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| table.vars[*].format : string, optional
| specifies the format to apply to the variable.
|
| table.vars[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| table.vars[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| target : string
| specifies the target or response variable for training the decision
| tree. If the variable is numeric, but not specified in normal=, and
| nbinstarget= is not specified, then a regression tree is trained.
|
| inputs : list of dicts
| specifies the input variables to use in the analysis.
|
| inputs[*].name : string
| specifies the name for the variable.
|
| inputs[*].label : string, optional
| specifies the descriptive label for the variable.
|
| inputs[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| inputs[*].format : string, optional
| specifies the format to apply to the variable.
|
| inputs[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| inputs[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nominals : list of dicts, optional
| specifies the nominal input variables to use in the analysis.
|
| nominals[*].name : string
| specifies the name for the variable.
|
| nominals[*].label : string, optional
| specifies the descriptive label for the variable.
|
| nominals[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| nominals[*].format : string, optional
| specifies the format to apply to the variable.
|
| nominals[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| nominals[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| attributes : list of dicts, optional
| specifies temporary attributes, such as a format, to apply to input
| variables.
|
| attributes[*].name : string
| specifies the name for the variable.
|
| attributes[*].label : string, optional
| specifies the descriptive label for the variable.
|
| attributes[*].formattedlength : int32, optional
| specifies the format field length plus the format precision
| length.
| Default: 0
|
| attributes[*].format : string, optional
| specifies the format to apply to the variable.
|
| attributes[*].nfl : int32, optional
| specifies the format field length.
| Default: 0
|
| attributes[*].nfd : int32, optional
| specifies the format precision length.
| Default: 0
|
| nbins : int32, optional
| specifies the number of bins to use for numeric variables in the
| calculation of the decision tree.
| Default: 20
| Note: Value range is 1 <= n < 2147483647
|
| maxlevel : int32, optional
| specifies the maximum number of the tree level.
| Default: 6
| Note: Value range is 1 <= n < 2147483647
|
| maxbranch : int32, optional
| specifies the maximum number of children (branches) allowed for each
| level of the tree.
| Default: 2
| Note: Value range is 1 <= n < 2147483647
|
| leafsize : int32, optional
| specifies the minimum number of observations on each node.
| Default: 5
| Note: Value range is 1 <= n < 2147483647
|
| missing : string, optional
| specifies the missing policy to handle missing values.
| Default: MACSMALL
| Values: MACSMALL, USEINSEARCH
|
| minuseinsearch : int32, optional
| specifies a threshold for utilizing missing values in the split
| search when the missing parameter is set to USEINSEARCH. If the
| number of observations in which the splitting variable has missing
| values in a node is greater than or equal to the specified value,
| then the action initiates the USEINSEARCH policy. Otherwise, the
| missing values are assigned to a popular branch.
| Default: 1
|
| greedy : boolean, optional
| by default, a greedy search or exhaustive search is used to
| determine the best split for each variable of each tree node. When
| set to False, a fast and efficient algorithm that is based on
| clustering is applied. Setting this parameter to False is recommended
| for variables with high cardinality.
| Default: True
|
| binorder : boolean, optional
| by default, the bin order is preserved for numeric variables. When
| set to False, the bin order is ignored for numeric variables.
| Default: True
|
| varimp : boolean, optional
| specifies whether the variable importance information is generated.
| The importance value is determined by the total Gini reduction.
| Default: False
|
| code : dict, optional
| requests that the action produce SAS score code. Specify additional
| parameters.
|
| code.linesize : int32, optional
| specifies the line size for the generated code.
| Default: 120
| Note: Value range is 64 <= n <= 254
|
| code.fmtwdth : int32, optional
| specifies the width to use for formatting derived numbers such
| as parameter estimates in the DATA step code.
| Default: 20
| Note: Value range is 0 <= n <= 32
|
| code.indentsize : int32, optional
| specifies the number of spaces to indent the DATA step code for
| each indent level.
| Default: 3
| Note: Value range is 0 <= n <= 10
|
| code.labelid : int32, optional
| specifies the label ID to use in array names and statement
| labels in the DATA step code. By default, a random positive
| integer is used.
| Default: 0
|
| code.notrim : boolean, optional
| requests that the comparison of variables with formatted values
| be based on the full format width, with padding. By default,
| leading and trailing blanks are removed from the formatted
| values.
| Default: False
|
| code.comment : boolean, optional
| when set to True, adds comments to the DATA step code.
| Default: False
|
| code.tabform : boolean, optional
| Table format
| Default: False
|
| modelid : string, optional
| specifies the model ID variable name to use when generating SAS
| score code. By default, DT_ is prefixed to the target variable name.
|
| casout : dict or CASTable, optional
| specifies the table to store the decision tree model in. When not
| specified, a random name is generated.
|
| casout.name : string or CASTable, optional
| specifies the name to associate with the table.
|
| casout.caslib : string, optional
| specifies the name of the caslib to use.
|
| casout.timestamp : string, optional
| specifies the timestamp to apply to the table. Specify the value
| in the form that is appropriate for your session locale.
|
| casout.compress : boolean, optional
| when set to True, data compression is applied to the table.
| Default: False
|
| casout.replace : boolean, optional
| specifies whether to overwrite an existing table with the same
| name.
| Default: False
|
| casout.replication : int32, optional
| specifies the number of copies of the table to make for fault
| tolerance. Larger values result in slower performance and use
| more memory, but provide high availability for data in the event
| of a node failure.
| Default: 1
| Note: Value range is 0 <= n < 2147483647
|
| casout.label : string, optional
| specifies the descriptive label to associate with the table.
|
| casout.maxmemsize : int64, optional
| specifies the maximum amount of physical memory, in bytes, to
| allocate for the table. After this threshold is reached, the
| server uses temporary files and operating system facilities for
| memory management.
| Default: 0
|
| casout.promote : boolean, optional
| when set to True, the output table is added with a global scope.
| This enables other sessions to access the table, subject to
| access controls. The target caslib must also have a global scope.
| Default: False
|
| casout.ondemand : boolean, optional
| when set to True, table access is less aggressive with virtual
| memory use.
| Default: True
|
| mergebin : boolean, optional
| by default, when the largest value in one bin matches the lowest
| value in a neighboring bin, the values are merged into the lower bin.
| When set to False, the action does not try to merge bins.
| Default: True
|
| includemissing : boolean, optional
| by default, observations with missing values are included. When set
| to False, observations with missing values for the variables used in
| the tree model are ignored when scoring.
| Default: True
|
| ntree : int32, optional
| specifies the number of trees to create.
| Default: 50
| Note: Value range is 1 <= n < 2147483647
|
| seed : double, optional
| specifies the seed for the random number generator. By default, the
| random number stream is based on the computer clock. Negative values
| also result in random number streams based on the computer clock. If
| you want a reproducible random number sequence between runs, specify
| a value that is greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n <= 2147483647.0
|
| learningrate : double, optional
| specifies the learning rate of each tree.
| Default: 0.1
| Note: Value range is 0.0 < n <= 1.0
|
| subsamplerate : double, optional
| specifies the fraction of the data to use for building each tree.
| Default: 0.5
| Note: Value range is 0.0 < n <= 1.0
|
| distribution : string or int64, optional
| specifies the distribution in gradient boosting tree.
|
| m : int32, optional
| specifies the number of input variables to consider for splitting on
| a node. The variables are selected at random from the input variables
| for each tree. By default, forest uses the square root of the number
| of input variables is used, rounded up to the nearest integer. For
| gradient boosting tree, the number of input variables is used.
| Default: 0
| Note: Value range is 1 <= n < 2147483647
|
| lasso : double, optional
| specifies the L1 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| ridge : double, optional
| specifies the L2 norm regularization on prediction. The value must
| be greater than zero.
| Default: 0.0
| Note: Value range is 0.0 <= n < max-double
|
| singular : double, optional
| specifies a small value to avoid zero in division.
| Default: 1e-12
| Note: Value range is 0.0 <= n < max-double
|
| Returns
| -------
| None
|
| ----------------------------------------------------------------------
| Data and other attributes defined here:
|
| all_params = {'attributes', 'attributes[*].format', 'attributes[*].for...
|
| param_names = ['table', 'target', 'inputs', 'nominals', 'attributes', ...
|
| ----------------------------------------------------------------------
| Methods inherited from CASAction:
|
| __iter__(self)
| Call the action and iterate over the results
|
| invoke(self, **kwargs)
| Invoke the action
|
| Parameters
| ----------
| **kwargs : any, optional
| Arbitrary key/value pairs to add to the arguments sent to the
| action. These key/value pairs are not added to the collection
| of parameters set on the action object. They are only used in
| this call.
|
| Returns
| -------
| self
| Returns the CASAction object itself
|
| retrieve = __call__(self, **kwargs)
| Call the action
|
| Parameters
| ----------
| **kwargs : any, optional
| Arbitrary key/value pairs to add to the arguments sent to the
| action. These key/value pairs are not added to the collection
| of parameters set on the action object. They are only used in
| this call.
|
| Returns
| -------
| CASResults object
| Collection of results from the action call
|
| ----------------------------------------------------------------------
| Class methods inherited from CASAction:
|
| from_reflection(asname, actinfo, connection) from builtins.type
| Construct a CASAction class from reflection information
|
| Parameters
| ----------
| asname : string
| The action set name
| actinfo : dict
| The reflection information for the action
| connection : CAS object
| The connection to associate with the CASAction
| defaults : dict
| Default parameters for the action
|
| Returns
| -------
| CASAction class
|
| get_connection() from builtins.type
| Return the registered connection
|
| The connection is only held by a weak reference. If the
| connection no longer exists, a SWATError is raised.
|
| Raises
| ------
| SWATError
| If the registered connection no longer exists
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from CASAction:
|
| trait_names = None
|
| ----------------------------------------------------------------------
| Methods inherited from swat.cas.utils.params.ParamManager:
|
| __delattr__(self, name)
| Delete an attribute
|
| __enter__(self)
|
| __exit__(self, type, value, traceback)
|
| __getattr__(self, name)
| Get named attribute
|
| __repr__(self)
| Return repr(self).
|
| __setattr__(self, name, value)
| Set an attribute
|
| __str__(self)
| Return str(self).
|
| del_param = del_params(self, *keys)
| Delete parameters
|
| Parameters
| ----------
| *keys : strings
| Names of parameters to delete
|
| Returns
| -------
| None
|
| del_params(self, *keys)
| Delete parameters
|
| Parameters
| ----------
| *keys : strings
| Names of parameters to delete
|
| Returns
| -------
| None
|
| has_param = has_params(self, *keys)
| Return a boolean indicating whether or not the parameters exist
|
| Parameters
| ----------
| *keys : one or more strings
| Names of parameters
|
| Returns
| -------
| True or False
|
| has_params(self, *keys)
| Return a boolean indicating whether or not the parameters exist
|
| Parameters
| ----------
| *keys : one or more strings
| Names of parameters
|
| Returns
| -------
| True or False
|
| to_dict(self)
| Return the parameters as a dictionary
|
| to_json(self, *args, **kwargs)
| Convert parameters to JSON
|
| Parameters
| ----------
| *args : any, optional
| Additional arguments to json.dumps
| **kwargs : any, optional
| Additional arguments to json.dumps
|
| Returns
| -------
| string
|
| to_params = to_dict(self)
| Return the parameters as a dictionary
|
| ----------------------------------------------------------------------
| Data descriptors inherited from swat.cas.utils.params.ParamManager:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
In [8]:
s.session.endsession() # end the session
Out[8]:
elapsed 0.00392s · user 0.000999s · sys 0.003s · mem 0.116MB
Content source: sassoftware/sas-viya-programming
Similar notebooks: